suppressWarnings(suppressPackageStartupMessages({
    library(knitr)
    library(dplyr)
    library(TwoSampleMR)
    library(ggplot2)
    library(ggthemes)
    library(magrittr)
    library(ggrepel)
}))
opts_chunk$set(cache=TRUE, echo=TRUE, message=FALSE, warning=FALSE)
threshold1 <- 0.05 / (300000 * 698)
threshold2 <- 0.05 / (300000)
load("../results/mr_disc_repl.rdata")
load("../../05_cis-trans-networks/data/outcomes.rdata")
ao <- ao %>% filter(access != "developer")
ggplot(mr_disc_repl %>% filter(!is.na(z.repl)), aes(y=z.repl, x=z.disc)) +
geom_point(aes(colour=fdr < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm") +
ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\nFDR < 0.05")

sub <- subset(mr_disc_repl, abs(b.repl) > 1.5 & abs(b.repl) < 20 | (abs(b.disc) > 10 & b.repl < 0) & b.repl > -50 ) %>% 
    tidyr::separate(outcome.disc, sep=" \\|", into=c("label", "other"))
ggplot(mr_disc_repl %>% filter(!is.na(z.repl), b.repl > -50), aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=fdr < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm") +
geom_text_repel(data=sub, aes(label=label)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\nFDR < 0.05")

summary(lm(b.repl ~ b.disc,  mr_disc_repl))
## 
## Call:
## lm(formula = b.repl ~ b.disc, data = mr_disc_repl)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3400 -0.1407 -0.0143  0.1115  4.5271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.002049   0.022043   0.093    0.926    
## b.disc      0.162850   0.001456 111.882   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4619 on 438 degrees of freedom
## Multiple R-squared:  0.9662, Adjusted R-squared:  0.9661 
## F-statistic: 1.252e+04 on 1 and 438 DF,  p-value: < 2.2e-16
summary(lm(b.repl ~ b.disc,  mr_disc_repl %>% filter(b.repl > -50)))
## 
## Call:
## lm(formula = b.repl ~ b.disc, data = mr_disc_repl %>% filter(b.repl > 
##     -50))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0564 -0.0644 -0.0101  0.0433  5.0623 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.007226   0.019227   0.376   0.7072  
## b.disc      0.022295   0.011990   1.859   0.0636 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4028 on 437 degrees of freedom
## Multiple R-squared:  0.00785,    Adjusted R-squared:  0.00558 
## F-statistic: 3.458 on 1 and 437 DF,  p-value: 0.06363

Are there more small p-values than expected?

binom.test(x=sum(mr_disc_repl$pval.repl < 0.05, na.rm=T), n=nrow(mr_disc_repl), p=0.05)
## 
##  Exact binomial test
## 
## data:  sum(mr_disc_repl$pval.repl < 0.05, na.rm = T) and nrow(mr_disc_repl)
## number of successes = 71, number of trials = 440, p-value <
## 2.2e-16
## alternative hypothesis: true probability of success is not equal to 0.05
## 95 percent confidence interval:
##  0.1282295 0.1991290
## sample estimates:
## probability of success 
##              0.1613636

What about direction of effect?

binom.test(x=sum(sign(mr_disc_repl$b.disc) == sign(mr_disc_repl$b.repl)), n=nrow(mr_disc_repl), p=0.5)
## 
##  Exact binomial test
## 
## data:  sum(sign(mr_disc_repl$b.disc) == sign(mr_disc_repl$b.repl)) and nrow(mr_disc_repl)
## number of successes = 234, number of trials = 440, p-value = 0.198
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.4839776 0.5792282
## sample estimates:
## probability of success 
##              0.5318182

What about direction of effect for just significant assocs?

subset(mr_disc_repl, pval.repl < 0.05) %$% binom.test(x=sum(sign(b.disc) == sign(b.repl)), n=length(pval.repl), p=0.5)
## 
##  Exact binomial test
## 
## data:  sum(sign(b.disc) == sign(b.repl)) and length(pval.repl)
## number of successes = 43, number of trials = 71, p-value = 0.09592
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.4825140 0.7196524
## sample estimates:
## probability of success 
##              0.6056338

Is small p-values in repl related to trait type

inner_join(mr_disc_repl, subset(ao, select=c(id, subcategory)), by=c("id.outcome"="id")) %$% summary(lm(z.repl ~ subcategory))
## 
## Call:
## lm(formula = z.repl ~ subcategory)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4025 -0.8777 -0.0020  0.8897 13.9125 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            0.02409    0.17531   0.137  0.89078
## subcategoryAutoimmune / inflammatory   0.10602    0.22659   0.468  0.64010
## subcategoryBone                       -0.34401    1.32355  -0.260  0.79505
## subcategoryCardiovascular             -0.60537    1.08540  -0.558  0.57732
## subcategoryDiabetes                   -1.18981    1.08540  -1.096  0.27362
## subcategoryEducation                   0.04921    0.77744   0.063  0.94956
## subcategoryGlycemic                   -0.53017    0.94406  -0.562  0.57469
## subcategoryHaemotological             -0.25413    0.29368  -0.865  0.38735
## subcategoryHemodynamic                -0.36184    1.32355  -0.273  0.78469
## subcategoryKidney                      0.50155    1.32355   0.379  0.70492
## subcategoryLipid                      -0.01673    0.33284  -0.050  0.95993
## subcategoryMetal                      -0.09363    0.94406  -0.099  0.92104
## subcategoryPsychiatric / neurological -1.41153    0.44118  -3.199  0.00148
## subcategoryReproductive aging         -0.19332    0.67897  -0.285  0.77599
##                                         
## (Intercept)                             
## subcategoryAutoimmune / inflammatory    
## subcategoryBone                         
## subcategoryCardiovascular               
## subcategoryDiabetes                     
## subcategoryEducation                    
## subcategoryGlycemic                     
## subcategoryHaemotological               
## subcategoryHemodynamic                  
## subcategoryKidney                       
## subcategoryLipid                        
## subcategoryMetal                        
## subcategoryPsychiatric / neurological **
## subcategoryReproductive aging           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.855 on 425 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0345, Adjusted R-squared:  0.004968 
## F-statistic: 1.168 on 13 and 425 DF,  p-value: 0.3003
inner_join(mr_disc_repl, subset(ao, select=c(id, subcategory)), by=c("id.outcome"="id")) %>% mutate(sig=pval.repl < 0.05) %$% summary(glm(sig ~ subcategory))
## 
## Call:
## glm(formula = sig ~ subcategory)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.50000  -0.17262  -0.14970  -0.09302   0.90698  
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            0.17857    0.03483   5.127 4.48e-07
## subcategoryAutoimmune / inflammatory  -0.02887    0.04502  -0.641   0.5217
## subcategoryBone                       -0.17857    0.26296  -0.679   0.4974
## subcategoryCardiovascular              0.15476    0.21564   0.718   0.4733
## subcategoryDiabetes                   -0.17857    0.21564  -0.828   0.4081
## subcategoryEducation                  -0.01190    0.15446  -0.077   0.9386
## subcategoryGlycemic                   -0.17857    0.18756  -0.952   0.3416
## subcategoryHaemotological             -0.01728    0.05835  -0.296   0.7672
## subcategoryHemodynamic                -0.17857    0.26296  -0.679   0.4974
## subcategoryKidney                     -0.17857    0.26296  -0.679   0.4974
## subcategoryLipid                      -0.08555    0.06613  -1.294   0.1965
## subcategoryMetal                       0.32143    0.18756   1.714   0.0873
## subcategoryPsychiatric / neurological  0.15476    0.08765   1.766   0.0782
## subcategoryReproductive aging         -0.05357    0.13489  -0.397   0.6915
##                                          
## (Intercept)                           ***
## subcategoryAutoimmune / inflammatory     
## subcategoryBone                          
## subcategoryCardiovascular                
## subcategoryDiabetes                      
## subcategoryEducation                     
## subcategoryGlycemic                      
## subcategoryHaemotological                
## subcategoryHemodynamic                   
## subcategoryKidney                        
## subcategoryLipid                         
## subcategoryMetal                      .  
## subcategoryPsychiatric / neurological .  
## subcategoryReproductive aging            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1358652)
## 
##     Null deviance: 59.517  on 438  degrees of freedom
## Residual deviance: 57.743  on 425  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 385.32
## 
## Number of Fisher Scoring iterations: 2
inner_join(mr_disc_repl, subset(ao, select=c(id, subcategory)), by=c("id.outcome"="id")) %>% mutate(sig=pval.repl < 0.05, blood = subcategory %in% c("Haemotological", "Metal", "Protein")) %$% summary(glm(sig ~ blood))
## 
## Call:
## glm(formula = sig ~ blood)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1818  -0.1582  -0.1582  -0.1582   0.8418  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.15818    0.01910    8.28 1.51e-15 ***
## bloodTRUE    0.02364    0.04927    0.48    0.632    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.136123)
## 
##     Null deviance: 59.517  on 438  degrees of freedom
## Residual deviance: 59.486  on 437  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 374.37
## 
## Number of Fisher Scoring iterations: 2

Which associations appear to be working?

mr_disc_repl$fdr <- p.adjust(mr_disc_repl$pval.repl, "fdr")
mr_disc_repl$concordant <- sign(mr_disc_repl$b.repl) == sign(mr_disc_repl$b.disc)
subset(mr_disc_repl, fdr < 0.05 & concordant)
##     id.exposure.disc id.outcome                           outcome.disc
## 28            3PlZT3         89                        Height || id:89
## 142           efgDzZ         30               Crohn's disease || id:30
## 144           egeEio        288 Systemic lupus erythematosus || id:288
## 170           gagCNW        288 Systemic lupus erythematosus || id:288
## 171           gdPW6i         89                        Height || id:89
## 172           Gi2MaJ         31    Inflammatory bowel disease || id:31
## 174           gi6WQc       1004            Age at menopause || id:1004
## 179           GvMT1T         89                        Height || id:89
## 199           InWOdR         89                        Height || id:89
## 228           KCSSjH         89                        Height || id:89
## 245           L8b1mm         22                 Schizophrenia || id:22
## 250           lnuS95        302                Triglycerides || id:302
## 297           nUzS4H        288 Systemic lupus erythematosus || id:288
## 314           oMhO90         89                        Height || id:89
## 360           Sj8U1Q         22                 Schizophrenia || id:22
## 429           VqZYFG        833         Rheumatoid arthritis || id:833
## 432           vS7Pql        288 Systemic lupus erythematosus || id:288
## 442           wHkrOx         22                 Schizophrenia || id:22
##       exposure method.disc nsnp.disc       b.disc     se.disc
## 28  cg09140281  Wald ratio         1  -0.34818193 0.032770064
## 142 cg21815063  Wald ratio         1   1.60299695 0.183137484
## 144 cg21685006  Wald ratio         1 -10.16842077 0.437970971
## 170 cg09198277  Wald ratio         1   3.84448131 0.576116960
## 171 cg16205897  Wald ratio         1  -0.05151995 0.007985593
## 172 cg24173182  Wald ratio         1   0.83077806 0.127757494
## 174 cg26170066  Wald ratio         1  -1.54908135 0.172120150
## 179 cg23891487  Wald ratio         1   0.60939718 0.049859769
## 199 cg14196790  Wald ratio         1   0.09185566 0.010298968
## 228 cg20135002  Wald ratio         1   0.11570999 0.015427999
## 245 cg13127506  Wald ratio         1  -0.32087002 0.049854803
## 250 cg13598010  Wald ratio         1  -0.52820358 0.041549937
## 297 cg21342636  Wald ratio         1  -1.51322718 0.120745331
## 314 cg05389922  Wald ratio         1   0.22654627 0.009665974
## 360 cg02207219  Wald ratio         1  -1.64419351 0.163951659
## 429 cg10636959  Wald ratio         1   1.01457989 0.150001749
## 432 cg19972273  Wald ratio         1   2.93002580 0.359355666
## 442 cg09012858  Wald ratio         1  -0.87055635 0.120905513
##         pval.disc id.exposure.repl                           outcome.repl
## 28   2.280122e-26           3PlZT3                        Height || id:89
## 142  2.078095e-18           iIZEzg               Crohn's disease || id:30
## 144 3.058642e-119           6JDJQj Systemic lupus erythematosus || id:288
## 170  2.504698e-11           gagCNW Systemic lupus erythematosus || id:288
## 171  1.106660e-10           gdPW6i                        Height || id:89
## 172  7.885243e-11           Gi2MaJ    Inflammatory bowel disease || id:31
## 174  2.257177e-19           78Z5u1            Age at menopause || id:1004
## 179  2.365294e-34           x6P4Zf                        Height || id:89
## 199  4.708619e-19           InWOdR                        Height || id:89
## 228  6.381783e-14           KCSSjH                        Height || id:89
## 245  1.225900e-10           L8b1mm                 Schizophrenia || id:22
## 250  5.039709e-37           lnuS95                Triglycerides || id:302
## 297  4.964581e-36           nUzS4H Systemic lupus erythematosus || id:288
## 314 1.772963e-121           jGgxyS                        Height || id:89
## 360  1.142085e-23           Sj8U1Q                 Schizophrenia || id:22
## 429  1.344306e-11           VqZYFG         Rheumatoid arthritis || id:833
## 432  3.533826e-16           vS7Pql Systemic lupus erythematosus || id:288
## 442  6.007882e-13           bdNGRD                 Schizophrenia || id:22
##     method.repl nsnp.repl      b.repl     se.repl    pval.repl     z.disc
## 28   Wald ratio         1 -0.18638449 0.048459967 1.199864e-04 -10.625000
## 142  Wald ratio         1  0.51884832 0.148829372 4.899424e-04   8.752970
## 144  Wald ratio         1 -0.78729726 0.206858802 1.412526e-04 -23.217111
## 170  Wald ratio         1  5.15523076 0.367112890 8.547957e-45   6.673092
## 171  Wald ratio         1 -0.06202405 0.008090093 1.765237e-14  -6.451613
## 172  Wald ratio         1  0.57397314 0.156662360 2.485406e-04   6.502774
## 174  Wald ratio         1 -0.49206625 0.164022084 2.699796e-03  -9.000000
## 179  Wald ratio         1  0.08820080 0.027210884 1.189528e-03  12.222222
## 199  Wald ratio         1  0.03668253 0.008803807 3.090859e-05   8.918919
## 228  Wald ratio         1  0.04266806 0.008031635 1.081314e-07   7.500000
## 245  Wald ratio         1 -0.33816315 0.079766439 2.241016e-05  -6.436090
## 250  Wald ratio         1 -0.18336038 0.043657234 2.669150e-05 -12.712500
## 297  Wald ratio         1 -1.89876058 0.284147449 2.352091e-11 -12.532387
## 314  Wald ratio         1  0.08613678 0.019073144 6.298030e-06  23.437500
## 360  Wald ratio         1 -0.93899660 0.114075988 1.851529e-16 -10.028526
## 429  Wald ratio         1  0.79337482 0.135795364 5.144501e-09   6.763787
## 432  Wald ratio         1  4.05716406 0.731101941 2.866809e-08   8.153554
## 442  Wald ratio         1 -0.42167233 0.044308118 1.785615e-21  -7.200303
##        z.repl            code   z.disc2   z.repl2          fdr concordant
## 28  -3.846154   89 cg09140281 10.625000  3.846154 2.772318e-03       TRUE
## 142  3.486196   30 cg21815063  8.752970  3.486196 9.776578e-03       TRUE
## 144 -3.805965  288 cg21685006 23.217111  3.805965 3.100495e-03       TRUE
## 170 14.042631  288 cg09198277  6.673092 14.042631 3.752553e-42       TRUE
## 171 -7.666667   89 cg16205897  6.451613  7.666667 1.549878e-12       TRUE
## 172  3.663759   31 cg24173182  6.502774  3.663759 5.195682e-03       TRUE
## 174 -3.000000 1004 cg26170066  9.000000  3.000000 3.950702e-02       TRUE
## 179  3.241379   89 cg23891487 12.222222  3.241379 2.008472e-02       TRUE
## 199  4.166667   89 cg14196790  8.918919  4.166667 8.480545e-04       TRUE
## 228  5.312500   89 cg20135002  7.500000  5.312500 4.315427e-06       TRUE
## 245 -4.239416   22 cg13127506  6.436090  4.239416 7.027187e-04       TRUE
## 250 -4.200000  302 cg13598010 12.712500  4.200000 7.811712e-04       TRUE
## 297 -6.682307  288 cg21342636 12.532387  6.682307 1.720947e-09       TRUE
## 314  4.516129   89 cg05389922 23.437500  4.516129 2.126796e-04       TRUE
## 360 -8.231326   22 cg02207219 10.028526  8.231326 2.032054e-14       TRUE
## 429  5.842429  833 cg10636959  6.763787  5.842429 3.226337e-07       TRUE
## 432  5.549382  288 cg19972273  8.153554  5.549382 1.398366e-06       TRUE
## 442 -9.516819   22 cg09012858  7.200303  9.516819 3.919425e-19       TRUE
subset(mr_disc_repl, fdr < 0.05 & !concordant)
##     id.exposure.disc id.outcome                           outcome.disc
## 11            1GUauY        273             Mean cell volume || id:273
## 16            1zjnhP        302                Triglycerides || id:302
## 17            2CGr2i        833         Rheumatoid arthritis || id:833
## 136           dRg6Vv         22                 Schizophrenia || id:22
## 164           fhqQsd         89                        Height || id:89
## 167           Fv8Jey       1054                        Gout || id:1054
## 192           I8U5cS         30               Crohn's disease || id:30
## 193           I8U5cS         31    Inflammatory bowel disease || id:31
## 277           N4u6Ql         89                        Height || id:89
## 351           Rlql9S        288 Systemic lupus erythematosus || id:288
## 375           tnuBQA         89                        Height || id:89
## 498           yYf8g3         89                        Height || id:89
##       exposure method.disc nsnp.disc     b.disc    se.disc    pval.disc
## 11  cg14026106  Wald ratio         1 -0.9837641 0.15379763 1.589951e-10
## 16  cg12966376  Wald ratio         1 -0.1995959 0.02724859 2.388987e-13
## 17  cg07790778  Wald ratio         1  1.2599408 0.16651908 3.838711e-14
## 136 cg06507285  Wald ratio         1  0.5529986 0.08689836 1.968995e-10
## 164 cg07097417  Wald ratio         1  0.1526297 0.02081314 2.244976e-13
## 167 cg24973591  Wald ratio         1  2.0543448 0.27117352 3.570380e-14
## 192 cg18608055  Wald ratio         1 -1.1049519 0.16432633 1.766422e-11
## 193 cg18608055  Wald ratio         1 -0.8349481 0.12566131 3.043864e-11
## 277 cg15270729  Wald ratio         1 -0.1085069 0.01345485 7.352650e-16
## 351 cg22097941  Wald ratio         1 -2.1154677 0.29173200 4.124620e-13
## 375 cg04606053  Wald ratio         1  0.1793291 0.02758910 8.032001e-11
## 498 cg23760103  Wald ratio         1  0.2046989 0.01998251 1.260473e-24
##     id.exposure.repl                           outcome.repl method.repl
## 11            1GUauY             Mean cell volume || id:273  Wald ratio
## 16            1zjnhP                Triglycerides || id:302  Wald ratio
## 17            2CGr2i         Rheumatoid arthritis || id:833  Wald ratio
## 136           NxskkE                 Schizophrenia || id:22  Wald ratio
## 164           54AdQv                        Height || id:89  Wald ratio
## 167           Fv8Jey                        Gout || id:1054  Wald ratio
## 192           u0Pbre               Crohn's disease || id:30  Wald ratio
## 193           u0Pbre    Inflammatory bowel disease || id:31  Wald ratio
## 277           Yy7qca                        Height || id:89  Wald ratio
## 351           Rlql9S Systemic lupus erythematosus || id:288  Wald ratio
## 375           I0Cr3X                        Height || id:89  Wald ratio
## 498           yYf8g3                        Height || id:89  Wald ratio
##     nsnp.repl     b.repl    se.repl    pval.repl    z.disc    z.repl
## 11          1  0.9803579 0.29081896 7.488913e-04 -6.396484  3.371025
## 16          1  0.2598365 0.06348278 4.257850e-05 -7.325000  4.093023
## 17          1 -0.8475167 0.26030234 1.130362e-03  7.566345 -3.255893
## 136         1 -0.2889436 0.09542444 2.461915e-03  6.363740 -3.027984
## 164         1 -0.0896861 0.02790234 1.307695e-03  7.333333 -3.214286
## 167         1 -2.0033987 0.37711034 1.081314e-07  7.575758 -5.312500
## 192         1  0.4968059 0.14941030 8.838321e-04 -6.724132  3.325111
## 193         1  0.4469365 0.10969364 4.613178e-05 -6.644433  4.074407
## 277         1  0.3239691 0.06633653 1.041024e-06 -8.064516  4.883721
## 351         1  0.6821828 0.21344427 1.393133e-03 -7.251408  3.196070
## 375         1 -0.1777637 0.02121696 5.366189e-17  6.500000 -8.378378
## 498         1 -0.2094551 0.03716138 1.736784e-08 10.243902 -5.636364
##                code   z.disc2   z.repl2          fdr concordant
## 11   273 cg14026106  6.396484 -3.371025 1.429406e-02      FALSE
## 16   302 cg12966376  7.325000 -4.093023 1.099527e-03      FALSE
## 17   833 cg07790778  7.566345 -3.255893 1.984915e-02      FALSE
## 136   22 cg06507285  6.363740 -3.027984 3.726830e-02      FALSE
## 164   89 cg07097417  7.333333 -3.214286 2.126215e-02      FALSE
## 167 1054 cg24973591  7.575758 -5.312500 4.315427e-06      FALSE
## 192   30 cg18608055  6.724132 -3.325111 1.616676e-02      FALSE
## 193   31 cg18608055  6.644433 -4.074407 1.125103e-03      FALSE
## 277   89 cg15270729  8.064516 -4.883721 3.808412e-05      FALSE
## 351  288 cg22097941  7.251408 -3.196070 2.184233e-02      FALSE
## 375   89 cg04606053  6.500000 -8.378378 7.852524e-15      FALSE
## 498   89 cg23760103 10.243902 -5.636364 9.530605e-07      FALSE

Schizophrenia has some consistent results but looking at all of them together is less convincing

ggplot(mr_disc_repl %>% filter(id.outcome == 22), aes(y=z.repl, x=z.disc)) +
geom_point(aes(colour=pval.repl < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm") +
ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05")

LDL cholesterol

ggplot(mr_disc_repl %>% filter(id.outcome == 300), aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=pval.repl < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm")

# ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05")
## $x
## [1] "Z-score primary MR"
## 
## $y
## [1] "Z-score secondary MR"
## 
## $colour
## [1] "Replication\np-value < 0.05"
## 
## attr(,"class")
## [1] "labels"

Systemic lupus erythematosus

ggplot(mr_disc_repl %>% filter(id.outcome == 288), aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=pval.repl < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm")

# ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05")
## $x
## [1] "Z-score primary MR"
## 
## $y
## [1] "Z-score secondary MR"
## 
## $colour
## [1] "Replication\np-value < 0.05"
## 
## attr(,"class")
## [1] "labels"

Get the comparisons for all traits for which there is at least one replication with FDR < 0.05

trait_list <- unique(subset(mr_disc_repl, pval.repl < 0.05)$id.outcome)
subset(mr_disc_repl, id.outcome %in% trait_list) %>% group_by(outcome.disc) %>% summarise(n=n()) %>% kable
outcome.disc n
Rheumatoid arthritis || id:833 12
Years of schooling || id:1001 6
Schizophrenia || id:22 20
Height || id:89 61
Crohn’s disease || id:30 53
Birth weight || id:1084 15
Inflammatory bowel disease || id:31 45
Mean platelet volume || id:1006 3
Ulcerative colitis || id:32 26
Systemic lupus erythematosus || id:288 22
Transferrin Saturation || id:1051 2
Mean cell haemoglobin || id:271 20
Extreme height || id:86 8
Mean cell volume || id:273 20
LDL cholesterol || id:300 7
Myocardial infarction || id:798 2
Triglycerides || id:302 12
Age at menopause || id:1004 5
Red blood cell count || id:275 17
Transferrin || id:1052 1
Gout || id:1054 1

Plot them

for(i in 1:length(trait_list))
{

    temp <- mr_disc_repl %>% filter(id.outcome == trait_list[i])
    temp$hla_disc <- FALSE
    temp$hla_repl <- FALSE
    for(j in 1:nrow(temp))
    {
        temp2 <- subset(discovery, id.outcome == trait_list[i] & exposure == temp$exposure[j])
        temp3 <- strsplit(temp2$SNP, split=":")[[1]]
        temp$hla_disc[j] <- temp3[1] == "chr6" & as.numeric(temp3[2]) > 24570005 & as.numeric(temp3[2]) < 38377657

        temp2 <- subset(replication, id.outcome == trait_list[i] & exposure == temp$exposure[j])
        temp3 <- strsplit(temp2$SNP, split=":")[[1]]
        temp$hla_repl[j] <- temp3[1] == "chr6" & as.numeric(temp3[2]) > 24570005 & as.numeric(temp3[2]) < 38377657
    }
    p1 <- ggplot(temp, aes(y=b.repl, x=b.disc)) +
    geom_point(aes(colour=pval.repl < 0.05, size=hla_disc, shape=hla_repl)) +
    geom_abline(slope=1, linetype="dotted") +
    geom_errorbar(width=0, color="grey", aes(ymin=b.repl-1.96*se.repl, ymax=b.repl+1.96*se.repl)) +
    geom_errorbarh(height=0, color="grey", aes(xmin=b.disc-1.96*se.disc, xmax=b.disc+1.96*se.disc)) +
    geom_smooth(method="lm") +
    labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05", title = unique(subset(mr_disc_repl, id.outcome == trait_list[i])$outcome.disc))
    print(p1)
}

Create table

out <- mr_disc_repl %$% data_frame(
    cpg = exposure,
    outcome = outcome.disc,
    beta.primary = b.disc,
    se.primary = se.disc,
    pval.primary = pval.disc,
    beta.secondary = b.repl,
    se.secondary = se.repl,
    pval.secondary = pval.repl
)

write.csv(out, file="../results/mr_primary_secondary.csv")